import pandas as pd
import pyproj
from shapely.geometry import Point
from shapely.ops import transform
import numpy as np
import plotly.express as px
df = pd.read_csv('OD_matrix_201803.csv',encoding='unicode_escape')
df.plot.scatter('X_start','Y_start')
<AxesSubplot:xlabel='X_start', ylabel='Y_start'>
It is similar to the map of Estonia, but the projection is a bit weird. At least it is not 4326 nor 3857. It is in the local projection of 3300.
df
| KANT_start | KANT_end | start_kant_id | end_kant_id | X_start | Y_start | X_end | Y_end | route_id | Population | RegularMovers | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aakre Valga | Aakre Valga | 1 | 1 | 627213.008799 | 6.440209e+06 | 627213.008799 | 6.440209e+06 | 1697 | 470 | 96 |
| 1 | Aakre Valga | Annikoru Tartu | 1 | 25 | 627213.008799 | 6.440209e+06 | 634298.193717 | 6.461856e+06 | 21178 | 470 | 1 |
| 2 | Aakre Valga | Elva linn | 1 | 49 | 627213.008799 | 6.440209e+06 | 642136.014361 | 6.456563e+06 | 41506 | 470 | 12 |
| 3 | Aakre Valga | Elva ümbruse kant Tartu | 1 | 50 | 627213.008799 | 6.440209e+06 | 639265.501560 | 6.453032e+06 | 42353 | 470 | 1 |
| 4 | Aakre Valga | Hellenurme Valga | 1 | 81 | 627213.008799 | 6.440209e+06 | 640178.641901 | 6.447022e+06 | 68610 | 470 | 2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 28504 | Ülenurme Tartu | Võnnu Tartu | 839 | 810 | 659202.938891 | 6.468463e+06 | 681155.798401 | 6.464204e+06 | 686062 | 4959 | 6 |
| 28505 | Ülenurme Tartu | Võru linn | 839 | 812 | 659202.938891 | 6.468463e+06 | 678417.528347 | 6.414736e+06 | 687756 | 4959 | 6 |
| 28506 | Ülenurme Tartu | Võõpste Tartu | 839 | 816 | 659202.938891 | 6.468463e+06 | 683260.892473 | 6.473663e+06 | 691144 | 4959 | 1 |
| 28507 | Ülenurme Tartu | Välgi Tartu | 839 | 823 | 659202.938891 | 6.468463e+06 | 669844.459691 | 6.497459e+06 | 697073 | 4959 | 1 |
| 28508 | Ülenurme Tartu | Ülenurme Tartu | 839 | 839 | 659202.938891 | 6.468463e+06 | 659202.938891 | 6.468463e+06 | 710625 | 4959 | 2073 |
28509 rows × 11 columns
len(df.KANT_start.unique())
845
len(df.KANT_end.unique())
844
set(df.KANT_start.unique()) - set(df.KANT_end.unique())
{'Vilsandi Saare'}
set(df.KANT_end.unique()) - set(df.KANT_start.unique())
set()
Then, the KANT_start.unique() has all the locaions we need to transform their projection.
start = df.groupby('KANT_start').first()
start
| KANT_end | start_kant_id | end_kant_id | X_start | Y_start | X_end | Y_end | route_id | Population | RegularMovers | |
|---|---|---|---|---|---|---|---|---|---|---|
| KANT_start | ||||||||||
| Aakre Valga | Aakre Valga | 1 | 1 | 627213.008799 | 6.440209e+06 | 627213.008799 | 6.440209e+06 | 1697 | 470 | 96 |
| Aardla Tartu | Aardla Tartu | 2 | 2 | 664206.080560 | 6.466176e+06 | 664206.080560 | 6.466176e+06 | 2545 | 1087 | 239 |
| Aarna Põlva | Aarna Põlva | 3 | 3 | 674944.509786 | 6.441704e+06 | 674944.509786 | 6.441704e+06 | 3393 | 243 | 19 |
| Aaspere Lääne-Viru | Aaspere Lääne-Viru | 4 | 4 | 620838.092143 | 6.592462e+06 | 620838.092143 | 6.592462e+06 | 4241 | 441 | 173 |
| Abja Viljandi | Abja-Paluoja linn | 6 | 5 | 579178.093070 | 6.441166e+06 | 579742.050531 | 6.444023e+06 | 5090 | 564 | 154 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Ääsmäe Harju | Aruküla Harju | 837 | 34 | 529187.870955 | 6.566963e+06 | 561079.126772 | 6.580363e+06 | 29635 | 1266 | 2 |
| Õisu Viljandi | Abja-Paluoja linn | 832 | 5 | 588943.147572 | 6.452916e+06 | 579742.050531 | 6.444023e+06 | 5914 | 455 | 19 |
| Õru Valga | Aakre Valga | 833 | 1 | 627647.829537 | 6.423045e+06 | 627213.008799 | 6.440209e+06 | 2527 | 354 | 1 |
| Üdruma Lääne | Haapsalu linn | 838 | 61 | 503259.157896 | 6.517914e+06 | 473490.460954 | 6.532856e+06 | 52505 | 133 | 9 |
| Ülenurme Tartu | Aakre Valga | 839 | 1 | 659202.938891 | 6.468463e+06 | 627213.008799 | 6.440209e+06 | 2533 | 4959 | 1 |
845 rows × 10 columns
def transformer(x,y):
EPSG_3300 = pyproj.CRS('EPSG:3300')
EPSG_4326 = pyproj.CRS('EPSG:4326')
project = pyproj.Transformer.from_crs(EPSG_3300, EPSG_4326, always_xy=True).transform
EPSG_4326_point = transform(project, Point(x,y))
return (EPSG_4326_point.x,EPSG_4326_point.y)
start_locations = list(map(transformer,start.X_start,start.Y_start))
start_locations_array = np.array(start_locations)
start_locations_array
lngs = start_locations_array[:,0]
lats = start_locations_array[:,1]
start_points = dict(zip(start.index.values,start_locations))
stdf = pd.DataFrame(zip(start.index.values,lngs,lats),columns=['name','lng','lat'])
stdf
| name | lng | lat | |
|---|---|---|---|
| 0 | Aakre Valga | 26.156788 | 58.084661 |
| 1 | Aardla Tartu | 26.801715 | 58.305377 |
| 2 | Aarna Põlva | 26.966251 | 58.081711 |
| 3 | Aaspere Lääne-Viru | 26.130749 | 59.452678 |
| 4 | Abja Viljandi | 25.342999 | 58.104500 |
| ... | ... | ... | ... |
| 840 | Ääsmäe Harju | 24.511392 | 59.240014 |
| 841 | Õisu Viljandi | 25.513075 | 58.208101 |
| 842 | Õru Valga | 26.154810 | 57.930509 |
| 843 | Üdruma Lääne | 24.056365 | 58.800669 |
| 844 | Ülenurme Tartu | 26.718025 | 58.327739 |
845 rows × 3 columns
The coordinate should be in this box:
24.9118,59.4768;24.5989,59.3643
tallinn = stdf[(stdf.lng < 24.9118 ) & (stdf.lng > 24.5989 ) & (stdf.lat < 59.4768) & (stdf.lat > 59.3643)]
df
| KANT_start | KANT_end | start_kant_id | end_kant_id | X_start | Y_start | X_end | Y_end | route_id | Population | RegularMovers | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aakre Valga | Aakre Valga | 1 | 1 | 627213.008799 | 6.440209e+06 | 627213.008799 | 6.440209e+06 | 1697 | 470 | 96 |
| 1 | Aakre Valga | Annikoru Tartu | 1 | 25 | 627213.008799 | 6.440209e+06 | 634298.193717 | 6.461856e+06 | 21178 | 470 | 1 |
| 2 | Aakre Valga | Elva linn | 1 | 49 | 627213.008799 | 6.440209e+06 | 642136.014361 | 6.456563e+06 | 41506 | 470 | 12 |
| 3 | Aakre Valga | Elva ümbruse kant Tartu | 1 | 50 | 627213.008799 | 6.440209e+06 | 639265.501560 | 6.453032e+06 | 42353 | 470 | 1 |
| 4 | Aakre Valga | Hellenurme Valga | 1 | 81 | 627213.008799 | 6.440209e+06 | 640178.641901 | 6.447022e+06 | 68610 | 470 | 2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 28504 | Ülenurme Tartu | Võnnu Tartu | 839 | 810 | 659202.938891 | 6.468463e+06 | 681155.798401 | 6.464204e+06 | 686062 | 4959 | 6 |
| 28505 | Ülenurme Tartu | Võru linn | 839 | 812 | 659202.938891 | 6.468463e+06 | 678417.528347 | 6.414736e+06 | 687756 | 4959 | 6 |
| 28506 | Ülenurme Tartu | Võõpste Tartu | 839 | 816 | 659202.938891 | 6.468463e+06 | 683260.892473 | 6.473663e+06 | 691144 | 4959 | 1 |
| 28507 | Ülenurme Tartu | Välgi Tartu | 839 | 823 | 659202.938891 | 6.468463e+06 | 669844.459691 | 6.497459e+06 | 697073 | 4959 | 1 |
| 28508 | Ülenurme Tartu | Ülenurme Tartu | 839 | 839 | 659202.938891 | 6.468463e+06 | 659202.938891 | 6.468463e+06 | 710625 | 4959 | 2073 |
28509 rows × 11 columns
sample_od = df[(df.KANT_start.isin(tallinn.name)) & (df.KANT_end.isin(tallinn.name))]
sample_od
| KANT_start | KANT_end | start_kant_id | end_kant_id | X_start | Y_start | X_end | Y_end | route_id | Population | RegularMovers | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1857 | Haabersti linnaosa | Haabersti linnaosa | 840 | 840 | 535384.478081 | 6.587643e+06 | 535384.478081 | 6.587643e+06 | 711473 | 44344 | 23355 |
| 1880 | Haabersti linnaosa | Kesklinna linnaosa | 840 | 845 | 535384.478081 | 6.587643e+06 | 543405.459220 | 6.588251e+06 | 715708 | 44344 | 6609 |
| 1896 | Haabersti linnaosa | Kristiine linnaosa | 840 | 841 | 535384.478081 | 6.587643e+06 | 540384.878201 | 6.586570e+06 | 712320 | 44344 | 2841 |
| 1911 | Haabersti linnaosa | Lasnamäe linnaosa | 840 | 847 | 535384.478081 | 6.587643e+06 | 548341.135208 | 6.588755e+06 | 717402 | 44344 | 1931 |
| 1924 | Haabersti linnaosa | Mustamäe linnaosa | 840 | 842 | 535384.478081 | 6.587643e+06 | 538422.923591 | 6.585222e+06 | 713167 | 44344 | 3304 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 17477 | Põhja-Tallinna linnaosa | Mustamäe linnaosa | 844 | 842 | 539835.733135 | 6.590848e+06 | 538422.923591 | 6.585222e+06 | 713171 | 58057 | 2180 |
| 17485 | Põhja-Tallinna linnaosa | Nõmme linnaosa | 844 | 843 | 539835.733135 | 6.590848e+06 | 538867.807140 | 6.582028e+06 | 714018 | 58057 | 768 |
| 17497 | Põhja-Tallinna linnaosa | Peetri Harju | 844 | 480 | 539835.733135 | 6.590848e+06 | 547720.494583 | 6.583147e+06 | 406557 | 58057 | 445 |
| 17500 | Põhja-Tallinna linnaosa | Pirita linnaosa | 844 | 846 | 539835.733135 | 6.590848e+06 | 548907.034349 | 6.593027e+06 | 716559 | 58057 | 343 |
| 17504 | Põhja-Tallinna linnaosa | Põhja-Tallinna linnaosa | 844 | 844 | 539835.733135 | 6.590848e+06 | 539835.733135 | 6.590848e+06 | 714865 | 58057 | 33294 |
81 rows × 11 columns
def substiute(st,nd):
st_lng = start_points[st][0]
st_lat = start_points[st][1]
nd_lng = start_points[nd][0]
nd_lat = start_points[nd][1]
return st_lng, nd_lng, st_lat, nd_lat
sample_od.KANT_end
1857 Haabersti linnaosa
1880 Kesklinna linnaosa
1896 Kristiine linnaosa
1911 Lasnamäe linnaosa
1924 Mustamäe linnaosa
...
17477 Mustamäe linnaosa
17485 Nõmme linnaosa
17497 Peetri Harju
17500 Pirita linnaosa
17504 Põhja-Tallinna linnaosa
Name: KANT_end, Length: 81, dtype: object
ndf = pd.DataFrame(map(substiute,sample_od.KANT_start,sample_od.KANT_end),columns=['start_lon','end_lon','start_lat','end_lat'])
ndf['start'] = sample_od.KANT_start.values
ndf['end'] = sample_od.KANT_end.values
ndf
| start_lon | end_lon | start_lat | end_lat | start | end | |
|---|---|---|---|---|---|---|
| 0 | 24.623326 | 24.623326 | 59.425178 | 59.425178 | Haabersti linnaosa | Haabersti linnaosa |
| 1 | 24.623326 | 24.764738 | 59.425178 | 59.429887 | Haabersti linnaosa | Kesklinna linnaosa |
| 2 | 24.623326 | 24.711209 | 59.425178 | 59.415103 | Haabersti linnaosa | Kristiine linnaosa |
| 3 | 24.623326 | 24.851803 | 59.425178 | 59.433880 | Haabersti linnaosa | Lasnamäe linnaosa |
| 4 | 24.623326 | 24.676419 | 59.425178 | 59.403183 | Haabersti linnaosa | Mustamäe linnaosa |
| ... | ... | ... | ... | ... | ... | ... |
| 76 | 24.702327 | 24.676419 | 59.453550 | 59.403183 | Põhja-Tallinna linnaosa | Mustamäe linnaosa |
| 77 | 24.702327 | 24.683678 | 59.453550 | 59.374473 | Põhja-Tallinna linnaosa | Nõmme linnaosa |
| 78 | 24.702327 | 24.839631 | 59.453550 | 59.383614 | Põhja-Tallinna linnaosa | Peetri Harju |
| 79 | 24.702327 | 24.862742 | 59.453550 | 59.472156 | Põhja-Tallinna linnaosa | Pirita linnaosa |
| 80 | 24.702327 | 24.702327 | 59.453550 | 59.453550 | Põhja-Tallinna linnaosa | Põhja-Tallinna linnaosa |
81 rows × 6 columns
alng = np.array(ndf.iloc[:,:2])
alat = np.array(ndf.iloc[:,2:4])
rlng = alng.reshape(-1)
rlat = alat.reshape(-1)
names = []
start_counts = ndf.groupby('start')['end'].count()
for c, name in zip(start_counts,start_counts.index):
names.append([name] * 2*c)
from itertools import chain
names = list(chain(*names))
od_series = pd.DataFrame(zip(rlng,rlat,names),columns=['lon','lat','locations'])
fig = px.line_mapbox(od_series, lat="lat", lon="lon", color="locations", zoom=3, height=300)
fig.update_layout(mapbox_style="stamen-terrain", mapbox_zoom=10, mapbox_center_lat = 59.3550,
margin={"r":0,"t":20,"l":0,"b":0},height=1200, width=1600,)
fig.show()
od_series.to_csv('Tallinn_od_serie.csv',index=False)